package com.gxuwz.crawlers;

import java.util.ArrayList;
import java.util.List;

import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.gxuwz.dao.AcademyDao;
import com.gxuwz.model.Academy;
import com.gxuwz.model.Const;

/**
 * 使用JSOUP抓取所有的高校地区
 * 
 * @author h
 * 
 */
public class AcademyJsoupCrawler {

	/**
	 * 解析网页，获取所有院校所在地信息
	 * 
	 * @return List<Academy>
	 */
	public static List<Academy> getAcademys() {
		final String url = "http://tieba.baidu.com/f/fdir?fd=%B8%DF%B5%C8%D4%BA%D0%A3&sd=%B1%B1%BE%A9%D4%BA%D0%A3";
		List<Academy> academys = new ArrayList<Academy>();
		Academy academy = null;
		Elements links = BasicJsoupCrawler.start(url).select("[class=root_dir_box]").select(
				"a[href]");
		int i = 0;
		for (Element link : links) {
			academy = new Academy();
			academy.setId(++i);
			academy.setAcademyName(link.text());
			academy.setAcademyURL(Const.SITE + link.attr("href"));
			academys.add(academy);
		}
		return academys;
	}

	/**
	 * 将数据写入数据库中
	 */
	public static void save() {
		AcademyDao.save(getAcademys()); 
	}

}
